# -*- coding: utf-8 -*-
import json

from scrapy import Spider, Request
from zhihuuser.items import UserItem


# 关注列表 followees
# url='https://www.zhihu.com/api/v4/members/excited-vczh/followees?include=data%5B*%5D.answer_count%2Carticles_count%2Cgender%2Cfollower_count%2Cis_followed%2Cis_following%2Cbadge%5B%3F(type%3Dbest_answerer)%5D.topics&offset=20&limit=20'
# 粉丝的详细信息
# https://www.zhihu.com/api/v4/members/tyh894520?include=allow_message%2Cis_followed%2Cis_following%2Cis_org%2Cis_blocking%2Cemployments%2Canswer_count%2Cfollower_count%2Carticles_count%2Cgender%2Cbadge%5B%3F(type%3Dbest_answerer)%5D.topics
# 粉丝列表
# url = 'https://www.zhihu.com/api/v4/members/excited-vczh/followers?include=data%5B*%5D.answer_count%2Carticles_count%2Cgender%2Cfollower_count%2Cis_followed%2Cis_following%2Cbadge%5B%3F(type%3Dbest_answerer)%5D.topics&offset=20&limit=20'


class ZhihuSpider(Spider):
    name = 'zhihu'
    allowed_domains = ['www.zhihu.com']

    start_urls = ['http://www.zhihu.com/']

    # start_user = 'excited-vczh'
    start_user = 'kaifulee'
    # 信息列表
    user_url = "https://www.zhihu.com/api/v4/members/{user}?include={include}"
    user_query = 'allow_message,is_followed,is_following,is_org,is_blocking,employments,answer_count,follower_count,articles_count,gender,badge[?(type=best_answerer)].topics'
    # 关注列表
    # follows_url = 'https://www.zhihu.com/api/v4/members/{user}/followees?include={include}&offset={offset}&limit={limit}'
    # follows_query = 'data[*].answer_count,articles_count,gender,follower_count,is_followed,is_following,badge[?(type=best_answerer)].topics'

    # 粉丝列表
    followers_url = 'https://www.zhihu.com/api/v4/members/{user}/followers?include={include}&offset={offset}&limit={limit}'
    followers_query = 'data[*].answer_count,articles_count,gender,follower_count,is_followed,is_following,badge[?(type=best_answerer)].topics'

    def start_requests(self):
        # 构造轮子哥的个人基本信息
        yield Request(self.user_url.format(user=self.start_user, include=self.user_query), self.parse_user)
        # yield Request(self.follows_url.format(user=self.start_user, include=self.follows_query, offset=0, limit=20),
        #               callback=self.parse_follows)
        yield Request(
            self.followers_url.format(user=self.start_user, include=self.followers_query, limit=20, offset=0),
            callback=self.parse_followers)

    def parse_user(self, response):
        result = json.loads(response.text)
        item = UserItem()
        for field in item.fields:
            if field in result.keys():
                item[field] = result.get(field)
        yield item

    # def parse_follows(self, response):
    #     results = json.loads(response.text)
    #     if 'data' in results.keys():  # results.keys()   dict_keys(['paging', 'data'])
    #         for result in results.get('data'):
    #             yield Request(self.user_url.format(user=result.get('url_token'), include=self.user_query),
    #                           callback=self.parse_user)
    #     if 'paging' in results.keys() and results.get('paging').get('is_end') == False:
    #         # print("卧槽")
    #         next_page = results.get('paging').get('next')
    #         next_page = 'https://www.zhihu.com/api/v4%s' % (next_page[21:])  # 拼接，形成有效的url
    #         yield Request(next_page, callback=self.parse_follows)

    def parse_followers(self, response):
        results = json.loads(response.text)
        if 'data' in results.keys():
            for result in results.get('data'):
                yield Request(self.user_url.format(user=result.get('url_token'), include=self.user_query),
                              callback=self.parse_user)
        if 'paging' in results.keys() and results.get('paging').get('is_end') == False:
            next_page = results.get('paging').get('next')
            next_page = 'https://www.zhihu.com/api/v4%s' % (next_page[21:])  # 拼接 %s  %等同于+，形成有效的url
            print('爬取到第几页了：', next_page[::-1][7:9])
            yield Request(next_page, callback=self.parse_followers)
